StateOfInterest = c("Arizona", "California", "Minnesota", "New Mexico", "New York",
"Oklahoma", "South Carolina", "Tennessee", "Utah", "Virginia",
"West Virginia", "Wisconsin")
library(readr)
county_data_abridged = read_csv("county_data_abridged.csv")
dim(county_data_abridged)
## [1] 3244 87
names(county_data_abridged)
## [1] "countyFIPS" "STATEFP"
## [3] "COUNTYFP" "CountyName"
## [5] "StateName" "State"
## [7] "lat" "lon"
## [9] "POP_LATITUDE" "POP_LONGITUDE"
## [11] "CensusRegionName" "CensusDivisionName"
## [13] "Rural-UrbanContinuumCode2013" "PopulationEstimate2018"
## [15] "PopTotalMale2017" "PopTotalFemale2017"
## [17] "FracMale2017" "PopulationEstimate65+2017"
## [19] "PopulationDensityperSqMile2010" "CensusPopulation2010"
## [21] "MedianAge2010" "#EligibleforMedicare2018"
## [23] "MedicareEnrollment,AgedTot2017" "3-YrDiabetes2015-17"
## [25] "DiabetesPercentage" "HeartDiseaseMortality"
## [27] "StrokeMortality" "Smokers_Percentage"
## [29] "RespMortalityRate2014" "#FTEHospitalTotal2017"
## [31] "TotalM.D.'s,TotNon-FedandFed2017" "#HospParticipatinginNetwork2017"
## [33] "#Hospitals" "#ICU_beds"
## [35] "dem_to_rep_ratio" "PopMale<52010"
## [37] "PopFmle<52010" "PopMale5-92010"
## [39] "PopFmle5-92010" "PopMale10-142010"
## [41] "PopFmle10-142010" "PopMale15-192010"
## [43] "PopFmle15-192010" "PopMale20-242010"
## [45] "PopFmle20-242010" "PopMale25-292010"
## [47] "PopFmle25-292010" "PopMale30-342010"
## [49] "PopFmle30-342010" "PopMale35-442010"
## [51] "PopFmle35-442010" "PopMale45-542010"
## [53] "PopFmle45-542010" "PopMale55-592010"
## [55] "PopFmle55-592010" "PopMale60-642010"
## [57] "PopFmle60-642010" "PopMale65-742010"
## [59] "PopFmle65-742010" "PopMale75-842010"
## [61] "PopFmle75-842010" "PopMale>842010"
## [63] "PopFmle>842010" "3-YrMortalityAge<1Year2015-17"
## [65] "3-YrMortalityAge1-4Years2015-17" "3-YrMortalityAge5-14Years2015-17"
## [67] "3-YrMortalityAge15-24Years2015-17" "3-YrMortalityAge25-34Years2015-17"
## [69] "3-YrMortalityAge35-44Years2015-17" "3-YrMortalityAge45-54Years2015-17"
## [71] "3-YrMortalityAge55-64Years2015-17" "3-YrMortalityAge65-74Years2015-17"
## [73] "3-YrMortalityAge75-84Years2015-17" "3-YrMortalityAge85+Years2015-17"
## [75] "mortality2015-17Estimated" "stay at home"
## [77] ">50 gatherings" ">500 gatherings"
## [79] "public schools" "restaurant dine-in"
## [81] "entertainment/gym" "federal guidelines"
## [83] "foreign travel ban" "SVIPercentile"
## [85] "HPSAShortage" "HPSAServedPop"
## [87] "HPSAUnderservedPop"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#EligibleforMedicare2018"] = "EligibleforMedicare2018"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#FTEHospitalTotal2017"] = "FTEHospitalTotal2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#HospParticipatinginNetwork2017"] = "HospParticipatinginNetwork2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#Hospitals"] = "Hospitals"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "#ICU_beds"] = "ICU_beds"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "PopulationEstimate65+2017"] = "PopulationEstimate_above65_2017"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "stay at home"] = "stay_at_home"
colnames(county_data_abridged)[colnames(county_data_abridged)
== ">50 gatherings"] = "above_50_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged)
== ">500 gatherings"] = "above_500_gatherings"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "public schools"] = "public_schools"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "restaurant dine-in"] = "restaurant_dine_in"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "entertainment/gym"] = "entertainment_gym"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "federal guidelines"] = "federal_guidelines"
colnames(county_data_abridged)[colnames(county_data_abridged)
== "foreign travel ban"] = "foreign_travel_ban"
data = subset(county_data_abridged,
select = c(State, CountyName, POP_LATITUDE, POP_LONGITUDE,
PopulationEstimate2018, PopTotalMale2017,
PopulationEstimate_above65_2017, PopulationDensityperSqMile2010,
DiabetesPercentage, Smokers_Percentage,
HeartDiseaseMortality, StrokeMortality,
Hospitals, ICU_beds, HospParticipatinginNetwork2017,
stay_at_home, above_50_gatherings, above_500_gatherings,
restaurant_dine_in, entertainment_gym))
data = na.omit(data)
data = droplevels(data)
data$stay_at_home = data$stay_at_home - range(data$stay_at_home)[1]
data$above_50_gatherings = data$above_50_gatherings - range(data$above_50_gatherings)[1]
data$above_500_gatherings = data$above_500_gatherings - range(data$above_500_gatherings)[1]
data$restaurant_dine_in = data$restaurant_dine_in - range(data$restaurant_dine_in)[1]
data$entertainment_gym = data$entertainment_gym - range(data$entertainment_gym)[1]
str(data)
## tibble [2,585 × 20] (S3: tbl_df/tbl/data.frame)
## $ State : chr [1:2585] "Alabama" "Alabama" "Alabama" "Alabama" ...
## $ CountyName : chr [1:2585] "Autauga" "Baldwin" "Barbour" "Bibb" ...
## $ POP_LATITUDE : num [1:2585] 32.5 30.5 31.8 33 34 ...
## $ POP_LONGITUDE : num [1:2585] -86.5 -87.8 -85.3 -87.1 -86.6 ...
## $ PopulationEstimate2018 : num [1:2585] 55601 218022 24881 22400 57840 ...
## $ PopTotalMale2017 : num [1:2585] 27007 103225 13335 12138 28607 ...
## $ PopulationEstimate_above65_2017: num [1:2585] 8392 42413 4757 3632 10351 ...
## $ PopulationDensityperSqMile2010 : num [1:2585] 91.8 114.7 31 36.8 88.9 ...
## $ DiabetesPercentage : num [1:2585] 9.9 8.5 15.7 13.3 14.9 22.4 16.9 15.6 17.5 12.2 ...
## $ Smokers_Percentage : num [1:2585] 18.1 17.5 22 19.1 19.2 ...
## $ HeartDiseaseMortality : num [1:2585] 204 183 220 226 225 ...
## $ StrokeMortality : num [1:2585] 56.1 41.9 49 57.2 52.8 54.1 59 44 45.2 47.3 ...
## $ Hospitals : num [1:2585] 1 3 1 1 1 1 1 2 0 1 ...
## $ ICU_beds : num [1:2585] 6 51 5 0 6 0 7 24 0 0 ...
## $ HospParticipatinginNetwork2017 : num [1:2585] 0 0 0 0 1 0 0 0 0 0 ...
## $ stay_at_home : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
## $ above_50_gatherings : num [1:2585] 5 5 5 5 5 5 5 5 5 5 ...
## $ above_500_gatherings : num [1:2585] 2 2 2 2 2 2 2 2 2 2 ...
## $ restaurant_dine_in : num [1:2585] 7 7 7 7 7 7 7 7 7 7 ...
## $ entertainment_gym : num [1:2585] 16 16 16 16 16 16 16 16 16 16 ...
## - attr(*, "na.action")= 'omit' Named int [1:659] 68 69 70 71 72 73 74 75 76 77 ...
## ..- attr(*, "names")= chr [1:659] "68" "69" "70" "71" ...
data_demographic_county = data
timeseries = read_csv("../datasets/timeseries.csv")
data = timeseries
data = subset(data, country == "United States" & level == "state")
data = subset(data, !(name %in% c("Unassigned cases, Arkansas, US",
"Unassigned cases, Georgia, US", "Unassigned cases, Illinois, US",
"Unassigned cases, Iowa, US", "Unassigned cases, Maine, US",
"Unassigned cases, Massachusetts, US", "Unassigned cases, North Dakota, US",
"Washington, D.C., US")))
data$state = matrix(unlist(strsplit(as.character(data$name), ", ")), ncol = 2, byrow = TRUE)[, 1]
data = subset(data,
select = c(state, date, cases, deaths, recovered))
data = subset(data, state %in% StateOfInterest)
data$state = as.factor(data$state)
data$cases = as.numeric(data$cases)
data$deaths = as.numeric(data$deaths)
data$recovered = as.numeric(data$recovered)
data = na.omit(data)
data = droplevels(data)
str(data)
## tibble [1,578 × 5] (S3: tbl_df/tbl/data.frame)
## $ state : Factor w/ 12 levels "Arizona","California",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Date[1:1578], format: "2020-04-14" "2020-04-15" ...
## $ cases : num [1:1578] 3806 3962 4234 4507 4719 ...
## $ deaths : num [1:1578] 131 142 150 169 177 184 187 208 229 249 ...
## $ recovered: num [1:1578] 249 385 460 539 539 ...
## - attr(*, "problems")= tibble [1,666,558 × 5] (S3: tbl_df/tbl/data.frame)
## ..$ row : int [1:1666558] 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 ...
## ..$ col : chr [1:1666558] "state" "state" "state" "state" ...
## ..$ expected: chr [1:1666558] "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" "1/0/T/F/TRUE/FALSE" ...
## ..$ actual : chr [1:1666558] "Burgenland" "Burgenland" "Burgenland" "Burgenland" ...
## ..$ file : chr [1:1666558] "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" "'../datasets/timeseries.csv'" ...
## - attr(*, "na.action")= 'omit' Named int [1:894] 1 2 3 4 5 6 7 8 9 10 ...
## ..- attr(*, "names")= chr [1:894] "1" "2" "3" "4" ...
range(data$cases)
## [1] 237 613076
range(data$deaths)
## [1] 1 25250
range(data$recovered)
## [1] 2 266287
data_cases_state = data
model_out = read_csv("../datasets/model_out.csv")
str(model_out)
## tibble [73 × 10] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ state : chr [1:73] "Arizona" "Arizona" "Arizona" "Arizona" ...
## $ startdate: Date[1:73], format: "2020-04-16" "2020-05-01" ...
## $ enddate : Date[1:73], format: "2020-04-30" "2020-05-15" ...
## $ k : num [1:73] 0.783 0.863 0.865 0.949 0.986 ...
## $ sigma : num [1:73] 0.0714 0.0714 0.0714 0.0714 0.0714 ...
## $ lamda : num [1:73] 0.0563 0.07 0.1266 0.0784 0.0925 ...
## $ c : num [1:73] 1 0 0.127 1 0 ...
## $ alpha : num [1:73] 0.06825 0.00721 0.06765 0.10224 0.05641 ...
## $ omega : num [1:73] 0.00275 0.003091 0.001429 0.001371 0.000698 ...
## $ miu : num [1:73] 0.0384 0.0175 0.0338 0.036 0.014 ...
## - attr(*, "spec")=
## .. cols(
## .. state = col_character(),
## .. startdate = col_date(format = ""),
## .. enddate = col_date(format = ""),
## .. k = col_double(),
## .. sigma = col_double(),
## .. lamda = col_double(),
## .. c = col_double(),
## .. alpha = col_double(),
## .. omega = col_double(),
## .. miu = col_double()
## .. )
data_parm = model_out
data = data_parm
n = length(data$state)
data$cases = rep(0, n)
data$deaths = rep(0, n)
data$recovered = rep(0, n)
for (i in 1:n){
j = data_cases_state$state == data$state[i] & data_cases_state$date == data$startdate[i]
if (sum(j) == 0){
data[i, ] = NA
}else{
data[i, c("cases", "deaths", "recovered")] = data_cases_state[j, c("cases", "deaths", "recovered")]
}
}
data = na.omit(data)
data_demographic_state = as.data.frame(matrix(nrow = length(data$state), ncol = length(colnames(data_demographic_county)) - 1))
colnames(data_demographic_state) = colnames(data_demographic_county)[colnames(data_demographic_county) != "CountyName"]
data_demographic_state$State = data$state
for (s in data_demographic_state$State){
for (k in 2:ncol(data_demographic_state)){
data_demographic_state[data_demographic_state$State == s, k] =
mean(unlist(data_demographic_county[data_demographic_county$State == s, k+1]))
}
}
data = cbind(data,
subset(data_demographic_state[, colnames(data_demographic_state)[colnames(data_demographic_state) != "State"]]))
data = na.omit(data)
data$days = rep(0, nrow(data))
data$temp = rep(0, nrow(data))
for (i in 1:nrow(data)){
data$temp = diff.Date(c(data[i, ]$startdate, data[i, ]$enddate))
}
for (s in data$state){
for (d in 1:nrow(subset(data, state == s))){
data[data$state == s, "days"][d] = sum(subset(data, state == s)[1:d, "temp"])
}
}
data = data[, colnames(data)[colnames(data) != "temp"]]
data_unamed = data[, colnames(data)[!(colnames(data) %in% c("state", "startdate", "enddate"))]]
# head(data_demographic_county, 10)
# head(data_demographic_state, 10)
# head(data[, c("state", "startdate", "enddate", "days")], 10)
head(data, 10)
## state startdate enddate k sigma lamda c alpha
## 1 Arizona 2020-04-16 2020-04-30 0.7834 0.07143 0.05633 1.00000 0.06825
## 2 Arizona 2020-05-01 2020-05-15 0.8631 0.07143 0.07001 0.00000 0.00721
## 3 Arizona 2020-05-16 2020-05-30 0.8653 0.07143 0.12657 0.12709 0.06765
## 4 Arizona 2020-06-01 2020-06-15 0.9492 0.07143 0.07839 1.00000 0.10224
## 5 Arizona 2020-06-16 2020-06-30 0.9864 0.07143 0.09253 0.00000 0.05641
## 6 Arizona 2020-07-01 2020-07-15 0.9732 0.07143 0.06093 0.00000 0.00000
## 7 California 2020-04-16 2020-04-30 0.8574 0.07143 0.07211 0.07752 0.02728
## 8 California 2020-05-01 2020-05-15 0.9481 0.07143 0.02832 1.00000 0.01925
## 9 California 2020-05-16 2020-05-30 0.9598 0.07143 0.04972 0.40540 0.03497
## 10 California 2020-06-01 2020-06-15 1.1270 0.07143 0.10206 0.01176 0.03073
## omega miu cases deaths recovered POP_LATITUDE POP_LONGITUDE
## 1 0.0027505 0.038429 4234 150 460 33.58 -111.5
## 2 0.0030913 0.017520 7962 330 1528 33.58 -111.5
## 3 0.0014293 0.033751 13631 679 3357 33.58 -111.5
## 4 0.0013712 0.036006 20123 917 4869 33.58 -111.5
## 5 0.0006979 0.013973 39097 1219 6598 33.58 -111.5
## 6 0.0000000 0.003805 84092 1720 9715 33.58 -111.5
## 7 0.0023628 0.007923 28035 970 1753 37.82 -120.9
## 8 0.0013106 0.006909 52152 2131 5130 37.82 -120.9
## 9 0.0009157 0.012128 78704 3207 9098 37.82 -120.9
## 10 0.0006310 0.012709 115032 4219 17585 37.82 -120.9
## PopulationEstimate2018 PopTotalMale2017 PopulationEstimate_above65_2017
## 1 478110 232553 80116
## 2 478110 232553 80116
## 3 478110 232553 80116
## 4 478110 232553 80116
## 5 478110 232553 80116
## 6 478110 232553 80116
## 7 682018 338751 94920
## 8 682018 338751 94920
## 9 682018 338751 94920
## 10 682018 338751 94920
## PopulationDensityperSqMile2010 DiabetesPercentage Smokers_Percentage
## 1 52.05 10.060 16.48
## 2 52.05 10.060 16.48
## 3 52.05 10.060 16.48
## 4 52.05 10.060 16.48
## 5 52.05 10.060 16.48
## 6 52.05 10.060 16.48
## 7 663.26 8.505 12.09
## 8 663.26 8.505 12.09
## 9 663.26 8.505 12.09
## 10 663.26 8.505 12.09
## HeartDiseaseMortality StrokeMortality Hospitals ICU_beds
## 1 148.8 30.90 5.067 103.9
## 2 148.8 30.90 5.067 103.9
## 3 148.8 30.90 5.067 103.9
## 4 148.8 30.90 5.067 103.9
## 5 148.8 30.90 5.067 103.9
## 6 148.8 30.90 5.067 103.9
## 7 153.9 37.89 5.672 126.5
## 8 153.9 37.89 5.672 126.5
## 9 153.9 37.89 5.672 126.5
## 10 153.9 37.89 5.672 126.5
## HospParticipatinginNetwork2017 stay_at_home above_50_gatherings
## 1 1.800 12 2
## 2 1.800 12 2
## 3 1.800 12 2
## 4 1.800 12 2
## 5 1.800 12 2
## 6 1.800 12 2
## 7 1.845 0 4
## 8 1.845 0 4
## 9 1.845 0 4
## 10 1.845 0 4
## above_500_gatherings restaurant_dine_in entertainment_gym days
## 1 6 7 7 14
## 2 6 7 7 28
## 3 6 7 7 42
## 4 6 7 7 56
## 5 6 7 7 70
## 6 6 7 7 84
## 7 8 3 3 14
## 8 8 3 3 28
## 9 8 3 3 42
## 10 8 3 3 56
model_fit_out = data.frame()
write.csv(model_fit_out, file = "model_fit_out.csv")
What we have achieved is a simple, interpretable, accessible model that anyone can get their hands on. It not only works towards assisting the prediction and prevention of pandemic, but also gives us clues on the relationship between variable social factors and the vulnerability of one place to the virus.
This is our very first step to make our own contribution to the control of the pandemic. We believe that everyone can take a step to do their parts, and we need not be freaked out by the technological obstacles and the already-done achievements by the “professionals”.
My partner and I are both undergraduates in non-CS majors, and this is our first time touching AWS or any other cloud service system. We started everything offline, accomplishing every tasks locally, and it did take us a while to figure out where to incorporate all those into AWS. But soon we see the enriched potentials and capability of AWS. Due to the limited time, we only took a slight bite of the AWS system, but we will be working to dig deeper.
The next step to take is to integrate everything into an efficient cloud computing system so that larger data and more sophisticated models could be handled, especially in a well-automated manner.
There are interested stuffs like the political factor. We may wanna see how it affects We have learned a lot from this experience and we are looking forwards to becoming professional data analytist…blabla
pairs() Plotkmod_k_1 = lm(k ~ days, data = data_unamed)
mod_k_2 = lm(k ~ . - k, data = data_unamed)
summary(mod_k_2)
##
## Call:
## lm(formula = k ~ . - k, data = data_unamed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5410 -0.0388 0.0132 0.0491 0.2173
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.30e+01 3.49e+00 3.73 0.00052 ***
## sigma NA NA NA NA
## lamda -5.83e+00 1.14e+00 -5.13 5.6e-06 ***
## c -2.86e-01 6.77e-02 -4.22 0.00011 ***
## alpha 2.46e+00 9.56e-01 2.58 0.01317 *
## omega 3.12e+01 2.43e+01 1.29 0.20429
## miu 3.73e+00 1.91e+00 1.96 0.05664 .
## cases 1.72e-07 1.55e-06 0.11 0.91257
## deaths 1.29e-05 1.81e-05 0.71 0.47941
## recovered -4.39e-06 5.96e-06 -0.74 0.46514
## POP_LATITUDE -1.59e-01 4.49e-02 -3.54 0.00093 ***
## POP_LONGITUDE 5.61e-02 1.50e-02 3.73 0.00052 ***
## PopulationEstimate2018 -8.87e-05 2.56e-05 -3.46 0.00118 **
## PopTotalMale2017 1.68e-04 4.94e-05 3.41 0.00137 **
## PopulationEstimate_above65_2017 5.73e-05 1.57e-05 3.65 0.00067 ***
## PopulationDensityperSqMile2010 -6.50e-06 1.32e-04 -0.05 0.96088
## DiabetesPercentage -6.81e-01 1.80e-01 -3.79 0.00043 ***
## Smokers_Percentage 2.70e-01 7.69e-02 3.51 0.00101 **
## HeartDiseaseMortality -2.32e-02 8.51e-03 -2.72 0.00907 **
## StrokeMortality 1.33e-01 4.09e-02 3.25 0.00218 **
## Hospitals NA NA NA NA
## ICU_beds NA NA NA NA
## HospParticipatinginNetwork2017 NA NA NA NA
## stay_at_home NA NA NA NA
## above_50_gatherings NA NA NA NA
## above_500_gatherings NA NA NA NA
## restaurant_dine_in NA NA NA NA
## entertainment_gym NA NA NA NA
## days 3.53e-03 1.35e-03 2.61 0.01204 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.135 on 46 degrees of freedom
## Multiple R-squared: 0.598, Adjusted R-squared: 0.432
## F-statistic: 3.61 on 19 and 46 DF, p-value: 0.000191
sigma# mod_sigma =
lambda# mod_lambda =
c# mod_c =
alpha# mod_alpha =
omega# mod_omega =
miu# mod_miu =
get_bp_decision = function(model, alpha) {
decide = unname(bptest(model)$p.value < alpha)
ifelse(decide, "Reject", "Fail to Reject")
}
get_bp_pval = function(model) {
bptest(model)$p.value
}
get_sw_decision = function(model, alpha) {
decide = unname(shapiro.test(resid(model))$p.value < alpha)
ifelse(decide, "Reject", "Fail to Reject")
}
get_sw_pval = function(model) {
shapiro.test(resid(model))$p.value
}
get_num_params = function(model) {
length(coef(model))
}
get_loocv_rmse = function(model, is_log) {
ifelse(
is_log,
sqrt(mean(na.omit(((dat_trn$price - exp(fitted(model))) / (1 - hatvalues(model))) ^ 2))),
sqrt(mean((resid(model) / (1 - hatvalues(model))) ^ 2))
)
}
get_adj_r2 = function(model) {
summary(model)$adj.r.squared
}
test_mod = function(model, is_log = FALSE){
c(loocv_rmse = get_loocv_rmse(model, is_log),
adj_r2 = get_adj_r2(model),
bp_pval = get_bp_pval(model),
sw_pval = get_sw_pval(model),
num_params = get_num_params(model))
}
diagnostics = function(model, pcol = "grey", lcol = "dodgerblue", alpha = 0.05, plotit = TRUE, testit = TRUE){
if (plotit){
par(mfrow = c(1, 2), pty="s")
plot(fitted(model), resid(model), col = "grey", pch = 20,
xlab = "Fitted", ylab = "Residual",
main = "Fitted versus Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
qqnorm(resid(model), col = pcol)
qqline(resid(model), col = lcol, lwd = 2)
}
if (testit){
list(p_val = shapiro.test(resid(model))$p,
decision = ifelse(test = shapiro.test(resid(model))$p < alpha,
yes = "Reject", no = "Fail to Reject"))
}
}
get_test_rmse = function(model) {
sqrt(mean((dat_tst$price - predict(model, newdata = dat_tst))^ 2))
}
get_perc_err = function(model) {
actual = dat_tst$price
predicted = predict(model, dat_tst)
100 * mean((abs(actual - predicted)) / actual)
}